import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
from shapely.geometry import mapping
import geopandas as gpd
import os # we need os to do some basic file operations
sentinal_fp = "../RandomForest/data_a/sentinel/"
# find every file in the sentinal_fp directory
sentinal_band_paths = [os.path.join(sentinal_fp, f) for f in os.listdir(sentinal_fp) if os.path.isfile(os.path.join(sentinal_fp, f))]
sentinal_band_paths.sort()
sentinal_band_paths
['../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B02_10m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B03_10m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B04_10m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B05_20m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B06_20m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B07_20m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B08_10m_extent.tif', '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B8A_20m_extent.tif']
file_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'
# Load the dataset
data = gpd.read_file(file_path)
data
| country | fua_name | fua_code | code_2018 | class_2018 | prod_date | identifier | perimeter | area | comment | Pop2018 | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 425-DE033L1 | 155.888916 | 1185.135544 | None | 9 | MULTIPOLYGON (((4387596.158 2801154.636, 43875... |
| 1 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 785-DE033L1 | 204.742805 | 2617.844445 | None | 53 | MULTIPOLYGON (((4385599.462 2805744.357, 43856... |
| 2 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 751-DE033L1 | 397.305451 | 9451.982988 | None | 193 | MULTIPOLYGON (((4389167.596 2805621.979, 43890... |
| 3 | DE | Augsburg | DE033L1 | 11210 | Discontinuous dense urban fabric (S.L. : 50% -... | 2020-09 | 5156-DE033L1 | 2683.734912 | 35662.775997 | None | 166 | MULTIPOLYGON (((4394320.820 2808000.000, 43943... |
| 4 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 1986-DE033L1 | 297.148595 | 4588.045376 | None | 60 | MULTIPOLYGON (((4389395.144 2807242.564, 43893... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 25378 | DE | Augsburg | DE033L1 | 31000 | Forests | 2020-09 | 24837-DE033L1 | 682.883091 | 27090.306391 | None | 0 | MULTIPOLYGON (((4411181.690 2815311.223, 44111... |
| 25379 | DE | Augsburg | DE033L1 | 32000 | Herbaceous vegetation associations (natural gr... | 2020-09 | 25099-DE033L1 | 485.354917 | 11090.189969 | None | 0 | MULTIPOLYGON (((4397210.659 2824962.333, 43972... |
| 25380 | DE | Augsburg | DE033L1 | 32000 | Herbaceous vegetation associations (natural gr... | 2020-09 | 25134-DE033L1 | 528.446541 | 11505.819504 | None | 0 | MULTIPOLYGON (((4397105.659 2832772.333, 43971... |
| 25381 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25235-DE033L1 | 568.167123 | 10591.167983 | None | 0 | MULTIPOLYGON (((4388260.684 2807000.000, 43882... |
| 25382 | DE | Augsburg | DE033L1 | 23000 | Pastures | 2020-09 | 23246-DE033L1 | 763.153970 | 18744.576748 | None | 0 | MULTIPOLYGON (((4391115.659 2828972.333, 43911... |
25383 rows × 12 columns
data.plot()
<Axes: >
# create a products directory within the data dir which won't be uploaded to Github
img_dir = '../RandomForest/data_a/products2/'
# check to see if the dir it exists, if not, create it
if not os.path.exists(img_dir):
os.makedirs(img_dir)
# filepath for image we're writing out
img_fp = img_dir + 'sentinel_bands.tif'
# Read metadata of first file and assume all other bands are the same
with rasterio.open(sentinal_band_paths[0]) as src0:
meta = src0.meta
# Update metadata to reflect the number of layers
meta.update(count = len(sentinal_band_paths))
# Read each layer and write it to stack
with rasterio.open(img_fp, 'w', **meta) as dst:
for id, layer in enumerate(sentinal_band_paths, start=1):
with rasterio.open(layer) as src1:
dst.write_band(id, src1.read(1))
--------------------------------------------------------------------------- CPLE_AppDefinedError Traceback (most recent call last) Cell In[66], line 19 16 meta.update(count = len(sentinal_band_paths)) 18 # Read each layer and write it to stack ---> 19 with rasterio.open(img_fp, 'w', **meta) as dst: 20 for id, layer in enumerate(sentinal_band_paths, start=1): 21 with rasterio.open(layer) as src1: File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\env.py:451, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds) 448 session = DummySession() 450 with env_ctor(session=session): --> 451 return f(*args, **kwds) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\__init__.py:314, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs) 312 writer = get_writer_for_driver(driver) 313 if writer is not None: --> 314 dataset = writer( 315 path, 316 mode, 317 driver=driver, 318 width=width, 319 height=height, 320 count=count, 321 crs=crs, 322 transform=transform, 323 dtype=dtype, 324 nodata=nodata, 325 sharing=sharing, 326 **kwargs 327 ) 328 else: 329 raise DriverCapabilityError( 330 "Writer does not exist for driver: %s" % str(driver) 331 ) File rasterio\_io.pyx:1430, in rasterio._io.DatasetWriterBase.__init__() File rasterio\_io.pyx:293, in rasterio._io._delete_dataset_if_exists() File rasterio\_err.pyx:195, in rasterio._err.exc_wrap_int() CPLE_AppDefinedError: Deleting ../RandomForest/data_a/products2/sentinel_bands.tif failed: Permission denied
full_dataset = rasterio.open(img_fp)
img_rows, img_cols = full_dataset.shape
img_bands = full_dataset.count
print(full_dataset.shape) # dimensions
print(full_dataset.count) # bands
(2197, 1478) 8
full_dataset
<open DatasetReader name='../RandomForest/data_a/products2/sentinel_bands.tif' mode='r'>
import numpy as np
from rasterio.plot import show
import matplotlib.pyplot as plt
img_fp = '../RandomForest/data_a/products2/sentinel_bands_proj.tif'
# Open the full extent of the raster dataset
with rasterio.open(img_fp) as dataset:
# Read the specified bands (3, 2, 1 for RGB)
img_data = dataset.read([3, 2, 1])
# Assuming you've already read the img_data
# Scale the data to 0-255 range, assuming it's 16-bit
img_data = np.clip(img_data / 4096 * 255, 0, 255).astype(np.uint8)
# Mask out no data values if necessary
no_data_value = -9999 # Replace with your actual no data value
img_data[img_data == no_data_value] = 0 # Set no data values to 0 for display
# Now display the image
fig, ax = plt.subplots(figsize=(10, 7))
show(img_data, ax=ax, transform=dataset.transform)
plt.show()
full_dataset = rasterio.open(img_fp)
raster_crs = full_dataset.crs
from rasterio.warp import calculate_default_transform, reproject, Resampling
# Define the source and destination coordinate reference systems
src_crs = 'EPSG:32632'
dst_crs = 'EPSG:4326'
# Open the original dataset
with rasterio.open('../RandomForest/data_a/products/sentinel_bands.tif') as src:
# Calculate the transform and dimensions for the new dataset
transform, width, height = calculate_default_transform(
src.crs, dst_crs, src.width, src.height, *src.bounds)
# Define the metadata for the new dataset
kwargs = src.meta.copy()
kwargs.update({
'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
# Create the new dataset and reproject the original data into it
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif', 'w', **kwargs) as dst:
for i in range(1, src.count + 1): # Loop through all bands
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=dst_crs,
resampling=Resampling.nearest)
shapefile = gpd.read_file('C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg')
shapefile.crs
<Projected CRS: EPSG:3035> Name: ETRS89-extended / LAEA Europe Axis Info [cartesian]: - Y[north]: Northing (metre) - X[east]: Easting (metre) Area of Use: - name: Europe - European Union (EU) countries and candidates. Europe - onshore and offshore: Albania; Andorra; Austria; Belgium; Bosnia and Herzegovina; Bulgaria; Croatia; Cyprus; Czechia; Denmark; Estonia; Faroe Islands; Finland; France; Germany; Gibraltar; Greece; Hungary; Iceland; Ireland; Italy; Kosovo; Latvia; Liechtenstein; Lithuania; Luxembourg; Malta; Monaco; Montenegro; Netherlands; North Macedonia; Norway including Svalbard and Jan Mayen; Poland; Portugal including Madeira and Azores; Romania; San Marino; Serbia; Slovakia; Slovenia; Spain including Canary Islands; Sweden; Switzerland; Türkiye (Turkey); United Kingdom (UK) including Channel Islands and Isle of Man; Vatican City State. - bounds: (-35.58, 24.6, 44.83, 84.73) Coordinate Operation: - name: Europe Equal Area 2001 - method: Lambert Azimuthal Equal Area Datum: European Terrestrial Reference System 1989 ensemble - Ellipsoid: GRS 1980 - Prime Meridian: Greenwich
shapefile = shapefile.to_crs(epsg=32632)
shapefile.crs
<Projected CRS: EPSG:32632> Name: WGS 84 / UTM zone 32N Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 6°E and 12°E, northern hemisphere between equator and 84°N, onshore and offshore. Algeria. Austria. Cameroon. Denmark. Equatorial Guinea. France. Gabon. Germany. Italy. Libya. Liechtenstein. Monaco. Netherlands. Niger. Nigeria. Norway. Sao Tome and Principe. Svalbard. Sweden. Switzerland. Tunisia. Vatican City State. - bounds: (6.0, 0.0, 12.0, 84.0) Coordinate Operation: - name: UTM zone 32N - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
shapefile
| country | fua_name | fua_code | code_2018 | class_2018 | prod_date | identifier | perimeter | area | comment | Pop2018 | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 425-DE033L1 | 155.888916 | 1185.135544 | None | 9 | MULTIPOLYGON (((640671.445 5353635.798, 640668... |
| 1 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 785-DE033L1 | 204.742805 | 2617.844445 | None | 53 | MULTIPOLYGON (((638616.901 5358201.245, 638618... |
| 2 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 751-DE033L1 | 397.305451 | 9451.982988 | None | 193 | MULTIPOLYGON (((642184.268 5358123.727, 642109... |
| 3 | DE | Augsburg | DE033L1 | 11210 | Discontinuous dense urban fabric (S.L. : 50% -... | 2020-09 | 5156-DE033L1 | 2683.734912 | 35662.775997 | None | 166 | MULTIPOLYGON (((647303.556 5360566.887, 647300... |
| 4 | DE | Augsburg | DE033L1 | 11100 | Continuous urban fabric (S.L. : > 80%) | 2020-09 | 1986-DE033L1 | 297.148595 | 4588.045376 | None | 60 | MULTIPOLYGON (((642390.773 5359747.470, 642356... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 25378 | DE | Augsburg | DE033L1 | 31000 | Forests | 2020-09 | 24837-DE033L1 | 682.883091 | 27090.306391 | None | 0 | MULTIPOLYGON (((664060.510 5368090.340, 664059... |
| 25379 | DE | Augsburg | DE033L1 | 32000 | Herbaceous vegetation associations (natural gr... | 2020-09 | 25099-DE033L1 | 485.354917 | 11090.189969 | None | 0 | MULTIPOLYGON (((649972.813 5377568.573, 649975... |
| 25380 | DE | Augsburg | DE033L1 | 32000 | Herbaceous vegetation associations (natural gr... | 2020-09 | 25134-DE033L1 | 528.446541 | 11505.819504 | None | 0 | MULTIPOLYGON (((649766.914 5385378.476, 649771... |
| 25381 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25235-DE033L1 | 568.167123 | 10591.167983 | None | 0 | MULTIPOLYGON (((641260.177 5359490.598, 641256... |
| 25382 | DE | Augsburg | DE033L1 | 23000 | Pastures | 2020-09 | 23246-DE033L1 | 763.153970 | 18744.576748 | None | 0 | MULTIPOLYGON (((643829.528 5381502.382, 643827... |
25383 rows × 12 columns
shapefile.groupby('code_2018')['class_2018'].unique()
code_2018 11100 [Continuous urban fabric (S.L. : > 80%)] 11210 [Discontinuous dense urban fabric (S.L. : 50% ... 11220 [Discontinuous medium density urban fabric (S.... 11230 [Discontinuous low density urban fabric (S.L. ... 11240 [Discontinuous very low density urban fabric (... 11300 [Isolated structures] 12100 [Industrial, commercial, public, military and ... 12210 [Fast transit roads and associated land] 12220 [Other roads and associated land] 12230 [Railways and associated land] 12400 [Airports] 13100 [Mineral extraction and dump sites] 13300 [Construction sites] 13400 [Land without current use] 14100 [Green urban areas] 14200 [Sports and leisure facilities] 21000 [Arable land (annual crops)] 23000 [Pastures] 31000 [Forests] 32000 [Herbaceous vegetation associations (natural g... 50000 [Water] Name: class_2018, dtype: object
shapefile = shapefile.to_crs({'init': 'epsg:4326'})
shapefile.crs
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 in_crs_string = _prepare_from_proj_string(in_crs_string)
<Geographic 2D CRS: +init=epsg:4326 +type=crs> Name: WGS 84 Axis Info [ellipsoidal]: - lon[east]: Longitude (degree) - lat[north]: Latitude (degree) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
len(shapefile)
25383
# this generates a list of shapely geometries
geoms = shapefile.geometry.values
# let's grab a single shapely geometry to check
geometry = geoms[0]
print(type(geometry))
print(geometry)
# transform to GeoJSON format
from shapely.geometry import mapping
feature = [mapping(geometry)] # can also do this using polygon.__geo_interface__
print(type(feature))
print(feature)
<class 'shapely.geometry.multipolygon.MultiPolygon'>
MULTIPOLYGON (((10.897603435546376 48.32024993125257, 10.897558639391013 48.32006889000404, 10.896796603606004 48.32015034274349, 10.896843224431512 48.32033682294202, 10.897603435546376 48.32024993125257)))
<class 'list'>
[{'type': 'MultiPolygon', 'coordinates': [(((10.897603435546376, 48.32024993125257), (10.897558639391013, 48.32006889000404), (10.896796603606004, 48.32015034274349), (10.896843224431512, 48.32033682294202), (10.897603435546376, 48.32024993125257)),)]}]
out_image, out_transform = mask(full_dataset, feature, crop=True)
out_image.shape
(8, 3, 8)
full_dataset.close()
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box
# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)
# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
shapefile = shapefile.to_crs(raster_crs)
# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
raster_bounds = src.bounds
raster_bbox = box(*raster_bounds) # Create a bounding box from the raster bounds
# Only proceed if the raster and shapefile overlap
if not shapefile.unary_union.intersects(raster_bbox):
raise ValueError("Shapefile and raster do not overlap")
else:
print("The shapefile and raster overlap.")
# Extract the raster values within the polygon
for geom in shapefile.geometry:
feature = [mapping(geom)]
out_image, out_transform = mask(src, feature, crop=True)
# process out_image
The shapefile and raster overlap.
--------------------------------------------------------------------------- WindowError Traceback (most recent call last) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:80, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width) 79 try: ---> 80 window = geometry_window(dataset, shapes, pad_x=pad_x, pad_y=pad_y) 82 except WindowError: 83 # If shapes do not overlap raster, raise Exception or UserWarning 84 # depending on value of crop File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\features.py:477, in geometry_window(dataset, shapes, pad_x, pad_y, north_up, rotated, pixel_precision, boundless) 476 if not boundless: --> 477 window = window.intersection(raster_window) 479 return window File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:775, in Window.intersection(self, other) 763 """Return the intersection of this window and another 764 765 Parameters (...) 773 Window 774 """ --> 775 return intersection([self, other]) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:125, in iter_args.<locals>.wrapper(*args, **kwargs) 124 if len(args) == 1 and isinstance(args[0], Iterable): --> 125 return function(*args[0]) 126 else: File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:239, in intersection(*windows) 226 """Innermost extent of window intersections. 227 228 Will raise WindowError if windows do not intersect. (...) 237 Window 238 """ --> 239 return functools.reduce(_intersection, windows) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:257, in _intersection(w1, w2) 256 else: --> 257 raise WindowError(f"Intersection is empty {w1} {w2}") WindowError: Intersection is empty Window(col_off=2116, row_off=722, width=20, height=41) Window(col_off=0, row_off=0, width=1968, height=1913) During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) Cell In[81], line 33 31 for geom in shapefile.geometry: 32 feature = [mapping(geom)] ---> 33 out_image, out_transform = mask(src, feature, crop=True) 34 # process out_image File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:178, in mask(dataset, shapes, all_touched, invert, nodata, filled, crop, pad, pad_width, indexes) 175 else: 176 nodata = 0 --> 178 shape_mask, transform, window = raster_geometry_mask( 179 dataset, shapes, all_touched=all_touched, invert=invert, crop=crop, 180 pad=pad, pad_width=pad_width) 182 if indexes is None: 183 out_shape = (dataset.count, ) + shape_mask.shape File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:86, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width) 82 except WindowError: 83 # If shapes do not overlap raster, raise Exception or UserWarning 84 # depending on value of crop 85 if crop: ---> 86 raise ValueError('Input shapes do not overlap raster.') 87 else: 88 warnings.warn('shapes are outside bounds of raster. ' 89 'Are they in different coordinate reference systems?') ValueError: Input shapes do not overlap raster.
Adjust Landcover Data to match the scene¶
# Reproject the shapefile to match the raster's CRS
shapefile = shapefile.to_crs(epsg=32632)
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
print(src.bounds)
print(shapefile.total_bounds)
BoundingBox(left=10.7543531803083, bottom=48.25329694782757, right=10.97190533912122, top=48.46476914285253) [ 610820.72860213 5327718.5402929 670858.48189483 5388978.22555986]
shapefile = shapefile.to_crs("EPSG:4326")
print(shapefile.total_bounds)
[10.494824 48.0892135 11.3108095 48.6385815]
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot
fig, ax = plt.subplots(figsize=(10, 10))
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
rasterio.plot.show(src, ax=ax)
shapefile.plot(ax=ax, facecolor='none', edgecolor='red')
plt.show()
from shapely.geometry import box
from shapely.affinity import scale
shapefile_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'
# Open the raster file and get its bounds
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
bounds = src.bounds
# Create a slightly smaller bounding box geometry from the raster bounds
# Reduce each side of the bounding box by a small fraction, e.g., 0.99 to reduce by 1%
smaller_bbox = scale(box(bounds.left, bounds.bottom, bounds.right, bounds.top), xfact=0.99, yfact=0.99, origin='center')
# Create a GeoDataFrame from the smaller bounding box
smaller_bbox_gdf = gpd.GeoDataFrame({'geometry': [smaller_bbox]}, crs=src.crs)
# Load the shapefile
shapefile = gpd.read_file(shapefile_path)
# Make sure the shapefile is in the same CRS as the raster
shapefile = shapefile.to_crs(src.crs)
# Perform a spatial join to find features completely within the smaller bounding box
within_shapefile = gpd.sjoin(shapefile, smaller_bbox_gdf, op='within')
# Save the clipped shapefile to a new file
within_shapefile.to_file('within_shapefile_smaller_bbox.gpkg', driver='GPKG')
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\IPython\core\interactiveshell.py:3466: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead. if await self.run_code(code, result, async_=asy):
within_shapefile = within_shapefile.groupby('class_2018', group_keys=False).apply(lambda x: x.sample(n=10, replace=True))
within_shapefile
| country | fua_name | fua_code | code_2018 | class_2018 | prod_date | identifier | perimeter | area | comment | Pop2018 | geometry | index_right | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15079 | DE | Augsburg | DE033L1 | 12400 | Airports | 2020-09 | 15062-DE033L1 | 5540.286317 | 1.073691e+06 | None | 0 | MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... | 0 |
| 15079 | DE | Augsburg | DE033L1 | 12400 | Airports | 2020-09 | 15062-DE033L1 | 5540.286317 | 1.073691e+06 | None | 0 | MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... | 0 |
| 15079 | DE | Augsburg | DE033L1 | 12400 | Airports | 2020-09 | 15062-DE033L1 | 5540.286317 | 1.073691e+06 | None | 0 | MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... | 0 |
| 15079 | DE | Augsburg | DE033L1 | 12400 | Airports | 2020-09 | 15062-DE033L1 | 5540.286317 | 1.073691e+06 | None | 0 | MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... | 0 |
| 15079 | DE | Augsburg | DE033L1 | 12400 | Airports | 2020-09 | 15062-DE033L1 | 5540.286317 | 1.073691e+06 | None | 0 | MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 22917 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25255-DE033L1 | 1521.883149 | 1.113501e+04 | None | 0 | MULTIPOLYGON (((10.87805 48.35996, 10.87788 48... | 0 |
| 24755 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25380-DE033L1 | 324.841752 | 2.014562e+03 | None | 0 | MULTIPOLYGON (((10.89835 48.38594, 10.89837 48... | 0 |
| 24697 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25314-DE033L1 | 1389.142043 | 1.032410e+05 | None | 0 | MULTIPOLYGON (((10.92348 48.44506, 10.92348 48... | 0 |
| 5884 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25293-DE033L1 | 406.403219 | 1.023780e+04 | None | 0 | MULTIPOLYGON (((10.84367 48.45584, 10.84332 48... | 0 |
| 22773 | DE | Augsburg | DE033L1 | 50000 | Water | 2020-09 | 25236-DE033L1 | 497.513898 | 5.422245e+03 | None | 0 | MULTIPOLYGON (((10.87693 48.37054, 10.87655 48... | 0 |
210 rows × 13 columns
within_shapefile.explore()
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box
# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)
shapefile = within_shapefile
# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
shapefile = shapefile.to_crs(raster_crs)
# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
raster_bounds = src.bounds
raster_bbox = box(*raster_bounds) # Create a bounding box from the raster bounds
# Only proceed if the raster and shapefile overlap
if not shapefile.unary_union.intersects(raster_bbox):
raise ValueError("Shapefile and raster do not overlap")
else:
print("The shapefile and raster overlap.")
# Extract the raster values within the polygon
for geom in shapefile.geometry:
feature = [mapping(geom)]
out_image, out_transform = mask(src, feature, crop=True)
# process out_image
The shapefile and raster overlap.
import numpy as np
# Initialize a list to hold the mean values for each band
band_means = []
# Extract the raster values within the polygon
for geom in shapefile.geometry:
feature = [mapping(geom)]
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
out_image, out_transform = mask(src, feature, crop=True)
# Calculate mean of each band, excluding no-data values
means = np.ma.array(out_image, mask=out_image == src.nodata).mean(axis=(1, 2))
band_means.append(means.filled(src.nodata))
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
raster_crs = src.crs
shapefile_crs = shapefile.crs
print("Raster CRS: ", raster_crs)
print("Shapefile CRS: ", shapefile_crs)
Raster CRS: EPSG:4326 Shapefile CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
raster_bounds = src.bounds
# Use the total_bounds attribute of the shapefile to get its bounding coordinates
shapefile_bounds = shapefile.total_bounds
print("Raster bounds: ", raster_bounds)
print("Shapefile bounds: ", shapefile_bounds)
Raster bounds: BoundingBox(left=10.7543531803083, bottom=48.25329694782757, right=10.97190533912122, top=48.46476914285253) Shapefile bounds: [10.75694347 48.25553441 10.96999023 48.4636161 ]
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot
# Open the raster file
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
# Plot the raster
fig, ax = plt.subplots()
rasterio.plot.show(src, ax=ax)
# Overlay the shapefile
shapefile.plot(ax=ax, color='red', alpha=0.5)
plt.show()
import numpy as np
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.features import geometry_mask
img_fp = '../RandomForest/data_a/products/sentinel_bands_proj.tif'
X = np.array([], dtype=np.float32).reshape(0, 8) # Replace '8' with the actual number of bands if different
y = [] # Initialize y as an empty list
incomplete_features = []
with rasterio.open(img_fp) as src:
raster_crs = src.crs
nodata = src.nodatavals[0] # Assuming all bands have the same nodata value
band_count = src.count
# Iterate over the geometries in the shapefile
for index, row in shapefile.iterrows():
geom = row['geometry']
# Create a mask for the current geometry
geom_mask = geometry_mask([geom], transform=src.transform, invert=True, out_shape=(src.height, src.width))
# Check if the geometry intersects with the raster
if not geom_mask.all():
out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata)
out_image_masked = np.ma.masked_equal(out_image, nodata)
valid_data = out_image_masked.compressed()
# Ensure we have a complete set of pixels for all bands
if valid_data.size % band_count == 0:
out_image_reshaped = valid_data.reshape(-1, band_count)
# Remove rows that contain nodata values
valid_rows = np.all(out_image_reshaped != nodata, axis=1)
out_image_reshaped = out_image_reshaped[valid_rows]
# Only proceed if we have valid data left after removing nodata rows
if out_image_reshaped.size > 0:
X = np.vstack((X, out_image_reshaped))
y.extend([row['class_2018']] * out_image_reshaped.shape[0])
else:
# Collect indices of incomplete features to handle later
incomplete_features.append(index)
print(f"Feature {index} does not have a complete set of pixels.")
# Handle incomplete features as needed
# For example, you might want to drop them from the shapefile
shapefile = shapefile.drop(incomplete_features)
Feature 5525 does not have a complete set of pixels. Feature 8841 does not have a complete set of pixels. Feature 2792 does not have a complete set of pixels. Feature 23790 does not have a complete set of pixels. Feature 24478 does not have a complete set of pixels. Feature 24478 does not have a complete set of pixels. Feature 5859 does not have a complete set of pixels. Feature 7959 does not have a complete set of pixels. Feature 12454 does not have a complete set of pixels. Feature 11810 does not have a complete set of pixels. Feature 10615 does not have a complete set of pixels. Feature 12162 does not have a complete set of pixels. Feature 13507 does not have a complete set of pixels. Feature 15178 does not have a complete set of pixels. Feature 23246 does not have a complete set of pixels. Feature 16438 does not have a complete set of pixels.
print(shapefile)
country fua_name fua_code code_2018 class_2018 prod_date \
15079 DE Augsburg DE033L1 12400 Airports 2020-09
15079 DE Augsburg DE033L1 12400 Airports 2020-09
15079 DE Augsburg DE033L1 12400 Airports 2020-09
15079 DE Augsburg DE033L1 12400 Airports 2020-09
15079 DE Augsburg DE033L1 12400 Airports 2020-09
... ... ... ... ... ... ...
22917 DE Augsburg DE033L1 50000 Water 2020-09
24755 DE Augsburg DE033L1 50000 Water 2020-09
24697 DE Augsburg DE033L1 50000 Water 2020-09
5884 DE Augsburg DE033L1 50000 Water 2020-09
22773 DE Augsburg DE033L1 50000 Water 2020-09
identifier perimeter area comment Pop2018 \
15079 15062-DE033L1 5540.286317 1.073691e+06 None 0
15079 15062-DE033L1 5540.286317 1.073691e+06 None 0
15079 15062-DE033L1 5540.286317 1.073691e+06 None 0
15079 15062-DE033L1 5540.286317 1.073691e+06 None 0
15079 15062-DE033L1 5540.286317 1.073691e+06 None 0
... ... ... ... ... ...
22917 25255-DE033L1 1521.883149 1.113501e+04 None 0
24755 25380-DE033L1 324.841752 2.014562e+03 None 0
24697 25314-DE033L1 1389.142043 1.032410e+05 None 0
5884 25293-DE033L1 406.403219 1.023780e+04 None 0
22773 25236-DE033L1 497.513898 5.422245e+03 None 0
geometry index_right valid
15079 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0 True
15079 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0 True
15079 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0 True
15079 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0 True
15079 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0 True
... ... ... ...
22917 MULTIPOLYGON (((10.87805 48.35996, 10.87788 48... 0 True
24755 MULTIPOLYGON (((10.89835 48.38594, 10.89837 48... 0 True
24697 MULTIPOLYGON (((10.92348 48.44506, 10.92348 48... 0 True
5884 MULTIPOLYGON (((10.84367 48.45584, 10.84332 48... 0 True
22773 MULTIPOLYGON (((10.87693 48.37054, 10.87655 48... 0 True
[194 rows x 14 columns]
# Initialize empty arrays for the pixel values and labels
X = np.array([], dtype=np.float32).reshape(0, 8) # Replace '8' with the number of bands
y = []
with rasterio.open(img_fp) as src:
# Ensure the shapefile is in the same CRS as the raster
shapefile = shapefile.to_crs(src.crs)
nodata = src.nodatavals[0] # Assuming all bands have the same nodata value
# Loop through each feature in the shapefile
for index, row in shapefile.iterrows():
geom = row.geometry
# Mask the raster with the geometry
out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata, filled=False)
# Check if there is any valid data
if np.any(out_image.mask == False):
# Reshape and append to X and y
valid_pixels = out_image.data[~out_image.mask].reshape(-1, src.count)
X = np.vstack((X, valid_pixels))
y.extend([row['class_2018']] * valid_pixels.shape[0])
out_image
masked_array(
data=[[[--, --, --, 238.0],
[--, --, --, 196.0],
[--, --, 306.0, 175.0],
[--, 241.0, 183.0, 183.0],
[--, 202.0, 175.0, 175.0],
[--, 194.0, 160.0, 160.0],
[--, 215.0, 169.0, 169.0],
[--, 210.0, 161.0, 161.0],
[--, 192.0, 157.0, 157.0],
[--, 187.0, 181.0, 181.0],
[--, 200.0, 218.0, 218.0],
[--, 178.0, 189.0, 314.0],
[--, 228.0, 197.0, 217.0],
[--, 266.0, 196.0, 195.0],
[--, 319.0, 252.0, 256.0],
[--, 302.0, 280.0, 295.0],
[--, 313.0, 301.0, 280.0],
[--, 709.0, 414.0, 227.0],
[1372.0, 1374.0, 1366.0, 1244.0],
[--, --, --, --]],
[[--, --, --, 661.0],
[--, --, --, 644.0],
[--, --, 680.0, 709.0],
[--, 688.0, 588.0, 588.0],
[--, 857.0, 905.0, 905.0],
[--, 686.0, 685.0, 685.0],
[--, 874.0, 987.0, 987.0],
[--, 942.0, 1188.0, 1188.0],
[--, 1178.0, 1114.0, 1114.0],
[--, 1216.0, 920.0, 920.0],
[--, 1122.0, 720.0, 720.0],
[--, 987.0, 741.0, 673.0],
[--, 873.0, 744.0, 684.0],
[--, 1118.0, 1152.0, 1172.0],
[--, 1152.0, 1162.0, 1230.0],
[--, 1232.0, 1194.0, 1196.0],
[--, 1254.0, 1144.0, 1128.0],
[--, 517.0, 934.0, 822.0],
[385.0, 490.0, 1014.0, 936.0],
[--, --, --, --]],
[[--, --, --, 608.0],
[--, --, --, 582.0],
[--, --, 501.0, 525.0],
[--, 451.0, 626.0, 626.0],
[--, 861.0, 891.0, 891.0],
[--, 514.0, 681.0, 681.0],
[--, 705.0, 991.0, 991.0],
[--, 959.0, 1242.0, 1242.0],
[--, 1216.0, 1216.0, 1216.0],
[--, 1178.0, 958.0, 958.0],
[--, 1080.0, 628.0, 628.0],
[--, 915.0, 572.0, 618.0],
[--, 1008.0, 686.0, 615.0],
[--, 1226.0, 1126.0, 1162.0],
[--, 1274.0, 1282.0, 1280.0],
[--, 1432.0, 1310.0, 1310.0],
[--, 1322.0, 1208.0, 1146.0],
[--, 420.0, 946.0, 606.0],
[281.0, 344.0, 1108.0, 764.0],
[--, --, --, --]],
[[--, --, --, 1070.375],
[--, --, --, 1070.5],
[--, --, 1004.0, 1089.5],
[--, 1074.125, 1123.375, 1123.375],
[--, 1168.375, 1172.125, 1172.125],
[--, 1203.3125, 1251.4375, 1251.4375],
[--, 1215.0625, 1290.1875, 1290.1875],
[--, 1246.6875, 1331.0625, 1331.0625],
[--, 1271.8125, 1343.9375, 1343.9375],
[--, 1290.4375, 1328.8125, 1328.8125],
[--, 1321.875, 1316.625, 1316.625],
[--, 1394.9375, 1301.8125, 1276.9375],
[--, 1408.3125, 1299.9375, 1279.3125],
[--, 1409.9375, 1333.3125, 1338.5625],
[--, 1399.8125, 1401.9375, 1454.6875],
[--, 1334.0, 1408.5, 1503.75],
[--, 1212.5, 1353.0, 1485.75],
[--, 924.3125, 1214.4375, 1395.75],
[687.25, 879.0, 1199.0, 1376.0],
[--, --, --, --]],
[[--, --, --, 2334.125],
[--, --, --, 2360.25],
[--, --, 2501.75, 2443.25],
[--, 2594.375, 2509.625, 2509.625],
[--, 2666.625, 2559.375, 2559.375],
[--, 2520.3125, 2444.9375, 2444.9375],
[--, 2360.875, 2288.125, 2288.125],
[--, 2163.625, 2067.375, 2067.375],
[--, 2069.875, 2017.125, 2017.125],
[--, 2079.625, 2137.375, 2137.375],
[--, 2114.4375, 2230.3125, 2230.3125],
[--, 2150.75, 2286.75, 2326.625],
[--, 2043.75, 2202.75, 2281.875],
[--, 1904.375, 2047.125, 2139.0],
[--, 1732.625, 1819.875, 1898.0],
[--, 1719.9375, 1798.3125, 1880.6875],
[--, 1866.3125, 1982.4375, 2087.0625],
[--, 2073.0, 2304.0, 2476.5625],
[2171.0625, 2252.9375, 2436.8125, 2577.5625],
[--, --, --, --]],
[[--, --, --, 2750.125],
[--, --, --, 2765.75],
[--, --, 2926.75, 2828.75],
[--, 3039.3125, 2901.4375, 2901.4375],
[--, 3140.4375, 2983.8125, 2983.8125],
[--, 2915.1875, 2788.5625, 2788.5625],
[--, 2725.75, 2602.75, 2602.75],
[--, 2530.75, 2388.75, 2388.75],
[--, 2408.375, 2337.125, 2337.125],
[--, 2358.625, 2447.875, 2447.875],
[--, 2370.9375, 2542.3125, 2542.3125],
[--, 2396.8125, 2594.9375, 2631.8125],
[--, 2225.4375, 2465.8125, 2573.9375],
[--, 2041.9375, 2239.3125, 2363.1875],
[--, 1846.3125, 1915.4375, 1999.5625],
[--, 1864.25, 1902.25, 1986.25],
[--, 2095.75, 2199.75, 2323.25],
[--, 2446.4375, 2594.3125, 2740.0],
[2712.0625, 2706.125, 2755.875, 2829.625],
[--, --, --, --]],
[[--, --, --, 2423.0],
[--, --, --, 2773.0],
[--, --, 3067.0, 3142.0],
[--, 3073.0, 2661.0, 2661.0],
[--, 2783.0, 2470.0, 2470.0],
[--, 2878.0, 2832.0, 2832.0],
[--, 2522.0, 2702.0, 2702.0],
[--, 2509.0, 2066.0, 2066.0],
[--, 2367.0, 1951.0, 1951.0],
[--, 2114.0, 2273.0, 2273.0],
[--, 2232.0, 2852.0, 2852.0],
[--, 2374.0, 2863.0, 2763.0],
[--, 2417.0, 2911.0, 2896.0],
[--, 1781.0, 2116.0, 2314.0],
[--, 1510.0, 1470.0, 1608.0],
[--, 1742.0, 1666.0, 1537.0],
[--, 1693.0, 2174.0, 2359.0],
[--, 2164.0, 2284.0, 3173.0],
[2413.0, 2697.0, 2470.0, 2996.0],
[--, --, --, --]],
[[--, --, --, 2957.6875],
[--, --, --, 2948.9375],
[--, --, 3192.4375, 3034.8125],
[--, 3257.875, 3066.125, 3066.125],
[--, 3259.125, 3042.875, 3042.875],
[--, 3059.3125, 2893.4375, 2893.4375],
[--, 2869.3125, 2758.9375, 2758.9375],
[--, 2622.9375, 2581.8125, 2581.8125],
[--, 2501.875, 2565.125, 2565.125],
[--, 2506.125, 2708.875, 2708.875],
[--, 2545.875, 2840.625, 2840.625],
[--, 2550.3125, 2882.9375, 2940.5625],
[--, 2333.4375, 2608.3125, 2732.6875],
[--, 2132.5, 2334.0, 2468.375],
[--, 1947.5, 2060.0, 2147.625],
[--, 1927.625, 2038.375, 2131.6875],
[--, 2072.875, 2269.125, 2420.5625],
[--, 2489.75, 2772.25, 2965.875],
[2729.0, 2768.375, 2975.125, 3101.375],
[--, --, --, --]]],
mask=[[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]],
[[ True, True, True, False],
[ True, True, True, False],
[ True, True, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[ True, False, False, False],
[False, False, False, False],
[ True, True, True, True]]],
fill_value=-3.4e+38,
dtype=float32)
# Assuming 'shapefile' is a GeoDataFrame that has been properly read and is in the correct CRS
geoms = shapefile.geometry.values # This will give us a numpy array of geometry objects
X = np.array([], dtype=np.float32).reshape(0, band_count) # Adjust dtype and band_count as needed
y = []
with rasterio.open(img_fp) as src:
for geom in geoms: # Loop through each geometry in the array
feature = [mapping(geom)] # Convert to format expected by rasterio.mask.mask
out_image, out_transform = mask(src, feature, crop=True)
# Check for pixels that are not nodata (neither 0 nor 255 in all bands)
if out_image.any(): # If there's any non-nodata pixel
# Filter out the nodata pixels and reshape
out_image_reshaped = out_image[:, (out_image[0] != nodata) & (out_image[0] != 0) & (out_image[0] != 255)].reshape(-1, band_count)
# Now, we need to get the corresponding class for each geometry
class_label = shapefile.loc[shapefile.geometry == geom, 'class_2018'].values[0]
# Extend the X and y arrays
X = np.vstack((X, out_image_reshaped)) # Stack the pixels
y.extend([class_label] * out_image_reshaped.shape[0]) # Extend the labels
# What are our classification labels?
labels = np.unique(shapefile["class_2018"])
print('The training data include {n} classes: {classes}\n'.format(n=labels.size,
classes=labels))
# We will need a "X" matrix containing our features, and a "y" array containing our labels
print('Our X matrix is sized: {sz}'.format(sz=X.shape))
print('Our y array is sized: {sz}'.format(sz=np.array(y).shape))
The training data include 21 classes: ['Airports' 'Arable land (annual crops)' 'Construction sites' 'Continuous urban fabric (S.L. : > 80%)' 'Discontinuous dense urban fabric (S.L. : 50% - 80%)' 'Discontinuous low density urban fabric (S.L. : 10% - 30%)' 'Discontinuous medium density urban fabric (S.L. : 30% - 50%)' 'Discontinuous very low density urban fabric (S.L. : < 10%)' 'Fast transit roads and associated land' 'Forests' 'Green urban areas' 'Herbaceous vegetation associations (natural grassland, moors...)' 'Industrial, commercial, public, military and private units' 'Isolated structures' 'Land without current use' 'Mineral extraction and dump sites' 'Other roads and associated land' 'Pastures' 'Railways and associated land' 'Sports and leisure facilities' 'Water'] Our X matrix is sized: (188286, 8) Our y array is sized: (188286,)
fig, ax = plt.subplots(figsize=[13, 6])
# Numbers 1-8 for bands
band_count = np.arange(1, 9)
# Convert y to numpy array for indexing
y_array = np.array(y)
# Iterate over unique classes
classes = np.unique(y_array)
for class_type in classes:
# Calculate mean reflectance value for each band for the current class
band_intensity = np.mean(X[y_array == class_type, :], axis=0)
# Plot the mean reflectance values on the single plot
ax.plot(band_count, band_intensity, label=class_type)
# Add axis labels
ax.set_xlabel('Band #')
ax.set_ylabel('Reflectance Value')
# Add a title
ax.set_title('Band Intensities Full Overview')
# Add legend outside of the plot
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
# Adjust layout to accommodate the legend
plt.tight_layout()
# Display the plot
plt.show()
code_2018
11100 [Continuous urban fabric (S.L. : > 80%)]
11210 [Discontinuous dense urban fabric (S.L. : 50% ...
11220 [Discontinuous medium density urban fabric (S....
11230 [Discontinuous low density urban fabric (S.L. ...
11240 [Discontinuous very low density urban fabric (...
11300 [Isolated structures]
12100 [Industrial, commercial, public, military and ...
12210 [Fast transit roads and associated land]
12220 [Other roads and associated land]
12230 [Railways and associated land]
12400 [Airports]
13100 [Mineral extraction and dump sites]
13300 [Construction sites]
13400 [Land without current use]
14100 [Green urban areas]
14200 [Sports and leisure facilities]
21000 [Arable land (annual crops)]
23000 [Pastures]
31000 [Forests]
32000 [Herbaceous vegetation associations (natural g...
50000 [Water]
Name: class_2018, dtype: object
The training data include 21 classes: [
'Airports'
'Arable land (annual crops)'
'Construction sites'
'Continuous urban fabric (S.L. : > 80%)'
'Discontinuous dense urban fabric (S.L. : 50% - 80%)'
'Discontinuous low density urban fabric (S.L. : 10% - 30%)'
'Discontinuous medium density urban fabric (S.L. : 30% - 50%)'
'Discontinuous very low density urban fabric (S.L. : < 10%)'
'Fast transit roads and associated land'
'Forests'
'Green urban areas'
'Herbaceous vegetation associations (natural grassland, moors...)'
'Industrial, commercial, public, military and private units'
'Isolated structures'
'Land without current use'
'Mineral extraction and dump sites'
'Other roads and associated land'
'Pastures'
'Railways and associated land'
'Sports and leisure facilities'
'Water']
def str_class_to_int(class_array):
class_array[class_array == 'Airports'] = 0
class_array[class_array == 'Arable land (annual crops)'] = 1
class_array[class_array == 'Construction sites'] = 2
class_array[class_array == 'Continuous urban fabric (S.L. : > 80%)'] = 3
class_array[class_array == 'Discontinuous dense urban fabric (S.L. : 50% - 80%)'] = 4
class_array[class_array == 'Discontinuous low density urban fabric (S.L. : 10% - 30%)'] = 5
class_array[class_array == 'Discontinuous medium density urban fabric (S.L. : 30% - 50%)'] = 6
class_array[class_array == 'Discontinuous very low density urban fabric (S.L. : < 10%)'] = 7
class_array[class_array == 'Fast transit roads and associated land'] = 8
class_array[class_array == 'Forests'] = 9
class_array[class_array == 'Green urban areas'] = 10
class_array[class_array == 'Herbaceous vegetation associations (natural grassland, moors...)'] = 11
class_array[class_array == 'Industrial, commercial, public, military and private units'] = 12
class_array[class_array == 'Isolated structures'] = 13
class_array[class_array == 'Land without current use'] = 14
class_array[class_array == 'Mineral extraction and dump sites'] = 15
class_array[class_array == 'Other roads and associated land'] = 16
class_array[class_array == 'Pastures'] = 17
class_array[class_array == 'Railways and associated land'] = 18
class_array[class_array == 'Sports and leisure facilities'] = 19
class_array[class_array == 'Water'] = 20
return(class_array.astype(int))
GaussianNB Naive Bayes¶
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X, y)
GaussianNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.windows import Window
from rasterio.plot import reshape_as_raster, reshape_as_image
Number of Bands: 8
Height: 450 pixels
Width: 1150 pixels
(8, 450, 1150)
(450, 1150, 8)
[:, 150:600, 250:1400] slices the array:
The : means all bands are included.
150:600 slices the rows (height) of the image.
250:1400 slices the columns (width) of the image.
img_data = src.read()
# Select specific bands (7, 3, 2) and the desired row and column ranges
# Adjust indices by subtracting 1 from the band number
selected_bands_img = img_data[[6, 2, 1], 150:600, 250:1400]
# Now, selected_bands_img.shape will give you the dimensions (number_of_selected_bands, height, width)
num_selected_bands, height, width = selected_bands_img.shape
with rasterio.open(img_fp) as src:
# may need to reduce this image size if your kernel crashes, takes a lot of memory
img = src.read()[:, 400:950, 450:1500]
# Take our full image and reshape into long 2d array (nrow * ncol, nband) for classification
print(img.shape)
reshaped_img = reshape_as_image(img)
print(reshaped_img.shape)
(8, 550, 1050) (550, 1050, 8)
class_prediction = gnb.predict(reshaped_img.reshape(-1, 8))
# Reshape our classification map back into a 2D matrix so we can visualize it
class_prediction = class_prediction.reshape(reshaped_img[:, :, 0].shape)
class_prediction = str_class_to_int(class_prediction)
def color_stretch(image, index):
colors = image[:, :, index].astype(np.float64)
for b in range(colors.shape[2]):
colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
return colors
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))
# next setup a colormap for our map
colors = dict((
(0, (255, 215, 0, 255)), # Airports - Gold
(1, (34, 139, 34, 255)), # Arable Land - Forest Green
(2, (255, 69, 0, 255)), # Construction - Orange Red
(3, (220, 20, 60, 255)), # Continuous Urban Fabric - Crimson
(4, (0, 191, 255, 255)), # Discontinuous Dense Urban Fabric - Deep Sky Blue
(5, (148, 0, 211, 255)), # Discontinuous Low Density Urban Fabric - Dark Violet
(6, (255, 140, 0, 255)), # Discontinuous Medium Density Urban Fabric - Dark Orange
(7, (32, 178, 170, 255)), # Discontinuous Very Low Density Urban Fabric - Light Sea Green
(8, (128, 0, 128, 255)), # Fast Transit Roads and Associated Land - Purple
(9, (34, 139, 34, 255)), # Forests - Forest Green
(10, (107, 142, 35, 255)), # Green Urban Areas - Olive Drab
(11, (154, 205, 50, 255)), # Herbaceous Vegetation Associations - Yellow Green
(12, (178, 34, 34, 255)), # Industrial, Commercial, Public, Military and Private Units - Firebrick
(13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
(14, (0, 128, 128, 255)), # Land without Current Use - Teal
(15, (165, 42, 42, 255)), # Mineral Extraction and Dump Sites - Brown
(16, (85, 107, 47, 255)), # Other Roads and Associated Land - Dark Olive Green
(17, (255, 99, 71, 255)), # Pastures - Tomato
(18, (0, 0, 128, 255)), # Railways and Associated Land - Navy
(19, (72, 61, 139, 255)), # Sports and Leisure Facilities - Dark Slate Blue
(20, (70, 130, 180, 255)), # Water - Steel Blue
))
# Put 0 - 255 as float 0 - 1
for k in colors:
v = colors[k]
_v = [_v / 255.0 for _v in v]
colors[k] = _v
index_colors = [colors[key] if key in colors else
(255, 255, 255, 0) for key in range(0, n+1)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap
# Define the class labels and their corresponding colors
class_labels = {
0: "Airports",
1: "Arable Land",
2: "Construction",
3: "Continuous Urban Fabric",
4: "Discontinuous Dense Urban Fabric",
5: "Discontinuous Low Density Urban Fabric",
6: "Discontinuous Medium Density Urban Fabric",
7: "Discontinuous Very Low Density Urban Fabric",
8: "Fast Transit Roads and Associated Land",
9: "Forests",
10: "Green Urban Areas",
11: "Herbaceous Vegetation Associations",
12: "Industrial, Commercial, Public, Military and Private Unit",
13: "Isolated Structures",
14: "Land without Current Use",
15: "Mineral Extraction and Dump Sites",
16: "Other Roads and Associated Land",
17: "Pastures",
18: "Railways and Associated Land",
19: "Sports and Leisure Facilities",
20: "Water",
}
patches = [mpatches.Patch(color=colors[key], label=class_labels[key]) for key in class_labels]
fig, axs = plt.subplots(2,1,figsize=(20,15))
img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)
axs[1].imshow(class_prediction, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)
fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17152\2740226285.py:39: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown fig.show()
with rasterio.open(img_fp) as src:
green_band = src.read(3)
red_band = src.read(4)
nir_band = src.read(8)
ndwi = (green_band.astype(float) - nir_band.astype(float)) / (green_band.astype(float) + nir_band.astype(float))
ndvi = (nir_band.astype(float) - red_band.astype(float)) / (red_band.astype(float) + nir_band.astype(float))
ndwi = ndwi[150:600, 250:1400]
ndvi = ndvi[150:600, 250:1400]
fig, axs = plt.subplots(2,2,figsize=(15,7))
img_stretched = color_stretch(reshaped_img, [3, 2, 1])
axs[0,0].imshow(img_stretched)
axs[0,1].imshow(class_prediction, cmap=cmap, interpolation='none')
nwdi_plot = axs[1,0].imshow(ndwi, cmap="RdYlGn")
axs[1,0].set_title("NDWI")
fig.colorbar(nwdi_plot, ax=axs[1,0])
ndvi_plot = axs[1,1].imshow(ndvi, cmap="RdYlGn")
axs[1,1].set_title("NDVI")
fig.colorbar(ndvi_plot, ax=axs[1,1])
plt.show()
fig, axs = plt.subplots(1,2,figsize=(15,15))
img_stretched = color_stretch(reshaped_img, [3, 2, 1])
axs[0].imshow(img_stretched[0:180, 160:350])
axs[1].imshow(class_prediction[0:180, 160:350], cmap=cmap, interpolation='none')
fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17152\1670099218.py:8: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown fig.show()
Random Forest¶
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import rasterio
import matplotlib.pyplot as plt
# Create the Random Forest model
rf = RandomForestClassifier()
# Train the model
rf.fit(X, y)
RandomForestClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
with rasterio.open(img_fp) as src:
img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)
# Predict the classes using the Random Forest model
class_prediction = rf.predict(reshaped_img.reshape(-1, 8))
# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
def color_stretch(image, index):
colors = image[:, :, index].astype(np.float64)
for b in range(colors.shape[2]):
colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
return colors
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))
# next setup a colormap for our map
colors = dict((
(0, (255, 215, 0, 255)), # Airports - Gold
(1, (34, 139, 34, 255)), # Arable Land - Forest Green
(2, (255, 69, 0, 255)), # Construction - Orange Red
(3, (220, 20, 60, 255)), # Continuous Urban Fabric - Crimson
(4, (0, 191, 255, 255)), # Discontinuous Dense Urban Fabric - Deep Sky Blue
(5, (148, 0, 211, 255)), # Discontinuous Low Density Urban Fabric - Dark Violet
(6, (255, 140, 0, 255)), # Discontinuous Medium Density Urban Fabric - Dark Orange
(7, (32, 178, 170, 255)), # Discontinuous Very Low Density Urban Fabric - Light Sea Green
(8, (128, 0, 128, 255)), # Fast Transit Roads and Associated Land - Purple
(9, (34, 139, 34, 255)), # Forests - Forest Green
(10, (107, 142, 35, 255)), # Green Urban Areas - Olive Drab
(11, (154, 205, 50, 255)), # Herbaceous Vegetation Associations - Yellow Green
(12, (178, 34, 34, 255)), # Industrial, Commercial, Public, Military and Private Units - Firebrick
(13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
(14, (0, 128, 128, 255)), # Land without Current Use - Teal
(15, (165, 42, 42, 255)), # Mineral Extraction and Dump Sites - Brown
(16, (85, 107, 47, 255)), # Other Roads and Associated Land - Dark Olive Green
(17, (255, 99, 71, 255)), # Pastures - Tomato
(18, (0, 0, 128, 255)), # Railways and Associated Land - Navy
(19, (72, 61, 139, 255)), # Sports and Leisure Facilities - Dark Slate Blue
(20, (70, 130, 180, 255)), # Water - Steel Blue
))
# Put 0 - 255 as float 0 - 1
for k in colors:
v = colors[k]
_v = [_v / 255.0 for _v in v]
colors[k] = _v
index_colors = [colors[key] if key in colors else
(255, 255, 255, 0) for key in range(0, n+1)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap
# Assuming reshaped_img is in the correct format
expected_shape = reshaped_img[:, :, 0].shape
class_prediction_2d = class_prediction.reshape(expected_shape)
fig, axs = plt.subplots(2, 1, figsize=(20, 15))
img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)
# Use the reshaped 2D array for visualization
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)
plt.show()
KMeans¶
from sklearn.cluster import KMeans
bands, rows, cols = img.shape
k = 21 # num of clusters
kmeans_predictions = KMeans(n_clusters=k, random_state=0).fit(reshaped_img.reshape(-1, 8))
kmeans_predictions_2d = kmeans_predictions.labels_.reshape(rows, cols)
# Now show the classmap next to the image
fig, axs = plt.subplots(1,2,figsize=(15,8))
img_stretched = color_stretch(reshaped_img, [7, 3, 2]) #try different band combinations
axs[0].imshow(img_stretched)
axs[1].imshow(kmeans_predictions_2d)
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)
<matplotlib.image.AxesImage at 0x2935337f310>
KNeighbours¶
from sklearn.neighbors import KNeighborsClassifier
# Create the KNN model
knn = KNeighborsClassifier()
# Train the model
knn.fit(X, y)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
with rasterio.open(img_fp) as src:
img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)
# Predict the classes using the KNN model
class_prediction = knn.predict(reshaped_img.reshape(-1, 8))
# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
def color_stretch(image, index):
colors = image[:, :, index].astype(np.float64)
for b in range(colors.shape[2]):
colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
return colors
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))
# next setup a colormap for our map
colors = dict((
(0, (255, 215, 0, 255)), # Airports - Gold
(1, (34, 139, 34, 255)), # Arable Land - Forest Green
(2, (255, 69, 0, 255)), # Construction - Orange Red
(3, (220, 20, 60, 255)), # Continuous Urban Fabric - Crimson
(4, (0, 191, 255, 255)), # Discontinuous Dense Urban Fabric - Deep Sky Blue
(5, (148, 0, 211, 255)), # Discontinuous Low Density Urban Fabric - Dark Violet
(6, (255, 140, 0, 255)), # Discontinuous Medium Density Urban Fabric - Dark Orange
(7, (32, 178, 170, 255)), # Discontinuous Very Low Density Urban Fabric - Light Sea Green
(8, (128, 0, 128, 255)), # Fast Transit Roads and Associated Land - Purple
(9, (34, 139, 34, 255)), # Forests - Forest Green
(10, (107, 142, 35, 255)), # Green Urban Areas - Olive Drab
(11, (154, 205, 50, 255)), # Herbaceous Vegetation Associations - Yellow Green
(12, (178, 34, 34, 255)), # Industrial, Commercial, Public, Military and Private Units - Firebrick
(13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
(14, (0, 128, 128, 255)), # Land without Current Use - Teal
(15, (165, 42, 42, 255)), # Mineral Extraction and Dump Sites - Brown
(16, (85, 107, 47, 255)), # Other Roads and Associated Land - Dark Olive Green
(17, (255, 99, 71, 255)), # Pastures - Tomato
(18, (0, 0, 128, 255)), # Railways and Associated Land - Navy
(19, (72, 61, 139, 255)), # Sports and Leisure Facilities - Dark Slate Blue
(20, (70, 130, 180, 255)), # Water - Steel Blue
))
# Put 0 - 255 as float 0 - 1
for k in colors:
v = colors[k]
_v = [_v / 255.0 for _v in v]
colors[k] = _v
index_colors = [colors[key] if key in colors else
(255, 255, 255, 0) for key in range(0, n+1)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
fig, axs = plt.subplots(2, 1, figsize=(20, 15))
img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)
class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)
plt.show()
Decision Tree¶
from sklearn.tree import DecisionTreeClassifier
# Create the Decision Tree model
dt = DecisionTreeClassifier()
# Train the model
dt.fit(X, y)
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
# Read and reshape the image
with rasterio.open(img_fp) as src:
img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)
# Predict the classes using the Decision Tree model
class_prediction = dt.predict(reshaped_img.reshape(-1, 8))
# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
fig, axs = plt.subplots(2, 1, figsize=(20, 15))
img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)
class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)
plt.show()
from sklearn.gaussian_process import GaussianProcessClassifier
# Create the Gaussian Process model
gp = GaussianProcessClassifier()
from sklearn.model_selection import train_test_split
# Suppose you want to use 20% of your data
X_subset, _, y_subset, _ = train_test_split(X, y, test_size=0.9, random_state=42)
# Due to computational intensity, consider using a subset of data or reducing the resolution
# Train the model
gp.fit(X_subset, y_subset)
--------------------------------------------------------------------------- MemoryError Traceback (most recent call last) Cell In[153], line 3 1 # Due to computational intensity, consider using a subset of data or reducing the resolution 2 # Train the model ----> 3 gp.fit(X_subset, y_subset) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) 1145 estimator._validate_params() 1147 with config_context( 1148 skip_parameter_validation=( 1149 prefer_skip_nested_validation or global_skip_validation 1150 ) 1151 ): -> 1152 return fit_method(estimator, *args, **kwargs) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:741, in GaussianProcessClassifier.fit(self, X, y) 738 else: 739 raise ValueError("Unknown multi-class mode %s" % self.multi_class) --> 741 self.base_estimator_.fit(X, y) 743 if self.n_classes_ > 2: 744 self.log_marginal_likelihood_value_ = np.mean( 745 [ 746 estimator.log_marginal_likelihood() 747 for estimator in self.base_estimator_.estimators_ 748 ] 749 ) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) 1145 estimator._validate_params() 1147 with config_context( 1148 skip_parameter_validation=( 1149 prefer_skip_nested_validation or global_skip_validation 1150 ) 1151 ): -> 1152 return fit_method(estimator, *args, **kwargs) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\multiclass.py:339, in OneVsRestClassifier.fit(self, X, y) 335 columns = (col.toarray().ravel() for col in Y.T) 336 # In cases where individual estimators are very fast to train setting 337 # n_jobs > 1 in can results in slower performance due to the overhead 338 # of spawning threads. See joblib issue #112. --> 339 self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( 340 delayed(_fit_binary)( 341 self.estimator, 342 X, 343 column, 344 classes=[ 345 "not %s" % self.label_binarizer_.classes_[i], 346 self.label_binarizer_.classes_[i], 347 ], 348 ) 349 for i, column in enumerate(columns) 350 ) 352 if hasattr(self.estimators_[0], "n_features_in_"): 353 self.n_features_in_ = self.estimators_[0].n_features_in_ File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\utils\parallel.py:65, in Parallel.__call__(self, iterable) 60 config = get_config() 61 iterable_with_config = ( 62 (_with_config(delayed_func, config), args, kwargs) 63 for delayed_func, args, kwargs in iterable 64 ) ---> 65 return super().__call__(iterable_with_config) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\parallel.py:1863, in Parallel.__call__(self, iterable) 1861 output = self._get_sequential_output(iterable) 1862 next(output) -> 1863 return output if self.return_generator else list(output) 1865 # Let's create an ID that uniquely identifies the current call. If the 1866 # call is interrupted early and that the same instance is immediately 1867 # re-used, this id will be used to prevent workers that were 1868 # concurrently finalizing a task from the previous call to run the 1869 # callback. 1870 with self._lock: File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\parallel.py:1792, in Parallel._get_sequential_output(self, iterable) 1790 self.n_dispatched_batches += 1 1791 self.n_dispatched_tasks += 1 -> 1792 res = func(*args, **kwargs) 1793 self.n_completed_tasks += 1 1794 self.print_progress() File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\utils\parallel.py:127, in _FuncWrapper.__call__(self, *args, **kwargs) 125 config = {} 126 with config_context(**config): --> 127 return self.function(*args, **kwargs) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\multiclass.py:90, in _fit_binary(estimator, X, y, classes) 88 else: 89 estimator = clone(estimator) ---> 90 estimator.fit(X, y) 91 return estimator File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:256, in _BinaryGaussianProcessClassifierLaplace.fit(self, X, y) 254 self.log_marginal_likelihood_value_ = -np.min(lml_values) 255 else: --> 256 self.log_marginal_likelihood_value_ = self.log_marginal_likelihood( 257 self.kernel_.theta 258 ) 260 # Precompute quantities required for predictions which are independent 261 # of actual query points 262 K = self.kernel_(self.X_train_) File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:385, in _BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood(self, theta, eval_gradient, clone_kernel) 381 K = kernel(self.X_train_) 383 # Compute log-marginal-likelihood Z and also store some temporaries 384 # which can be reused for computing Z's gradient --> 385 Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True) 387 if not eval_gradient: 388 return Z File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:443, in _BinaryGaussianProcessClassifierLaplace._posterior_mode(self, K, return_temporaries) 441 W_sr = np.sqrt(W) 442 W_sr_K = W_sr[:, np.newaxis] * K --> 443 B = np.eye(W.shape[0]) + W_sr_K * W_sr 444 L = cholesky(B, lower=True) 445 # Line 6 MemoryError: Unable to allocate 2.64 GiB for an array with shape (18828, 18828) and data type float64
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95) # Retain 95% of the variance
X_pca = pca.fit_transform(X)
X_subset, _, y_subset, _ = train_test_split(X_pca, y, test_size=0.7, random_state=42)
from sklearn.svm import SVC
import numpy as np
import rasterio
import matplotlib.pyplot as plt
# Create the SVM model with RBF kernel
svm = SVC(kernel='rbf')
# Train the model
svm.fit(X, y)